1use super::pe;
3use crate::ext::io::*;
4use crate::ext::mutex::*;
5use crate::scripts::base::*;
6use crate::types::*;
7use crate::utils::encoding::*;
8use crate::utils::murmur2::*;
9use crate::utils::struct_pack::*;
10use crate::utils::threadpool::*;
11use anyhow::{Result, anyhow, bail};
12use clap::ValueEnum;
13use int_enum::IntEnum;
14use std::any::Any;
15use std::collections::HashMap;
16use std::hash::Hasher;
17use std::io::{Read, Seek, SeekFrom, Write};
18use std::num::NonZeroU64;
19use std::ops::DerefMut;
20use std::sync::{Arc, Mutex};
21
22#[derive(Debug)]
23pub struct YpfBuilder {}
24
25impl YpfBuilder {
26 pub fn new() -> Self {
27 Self {}
28 }
29}
30
31impl ScriptBuilder for YpfBuilder {
32 fn default_encoding(&self) -> Encoding {
33 Encoding::Cp932
34 }
35
36 fn default_archive_encoding(&self) -> Option<Encoding> {
37 Some(Encoding::Cp932)
38 }
39
40 fn build_script(
41 &self,
42 data: Vec<u8>,
43 _filename: &str,
44 _encoding: Encoding,
45 archive_encoding: Encoding,
46 config: &ExtraConfig,
47 _archive: Option<&Box<dyn Script>>,
48 ) -> Result<Box<dyn Script + Send + Sync>> {
49 let mut base_offset = 0;
50 if data.starts_with(b"MZ") {
51 base_offset = pe::get_base_offset(&data)?;
52 }
53 Ok(Box::new(YPF::new(
54 MemReader::new(data),
55 archive_encoding,
56 config,
57 base_offset,
58 )?))
59 }
60
61 fn build_script_from_file(
62 &self,
63 filename: &str,
64 _encoding: Encoding,
65 archive_encoding: Encoding,
66 config: &ExtraConfig,
67 _archive: Option<&Box<dyn Script>>,
68 ) -> Result<Box<dyn Script + Send + Sync>> {
69 if filename == "-" {
70 let data = crate::utils::files::read_file(filename)?;
71 let mut base_offset = 0;
72 if data.starts_with(b"MZ") {
73 base_offset = pe::get_base_offset(&data)?;
74 }
75 Ok(Box::new(YPF::new(
76 MemReader::new(data),
77 archive_encoding,
78 config,
79 base_offset,
80 )?))
81 } else {
82 let mut file = std::fs::File::open(filename)?;
83 let mut base_offset = 0;
84 if file.peek_and_equal(b"MZ").is_ok() {
85 let mp = pelite::FileMap::open(filename)?;
86 base_offset = pe::get_base_offset(&mp)?;
87 }
88 Ok(Box::new(YPF::new(
89 file,
90 archive_encoding,
91 config,
92 base_offset,
93 )?))
94 }
95 }
96
97 fn build_script_from_reader<'a>(
98 &self,
99 mut reader: Box<dyn ReadSeek + Send + Sync + 'a>,
100 _filename: &str,
101 _encoding: Encoding,
102 archive_encoding: Encoding,
103 config: &ExtraConfig,
104 _archive: Option<&Box<dyn Script>>,
105 ) -> Result<Box<dyn Script + Send + Sync + 'a>> {
106 let mut base_offset = 0;
107 if reader.peek_and_equal(b"MZ").is_ok() {
108 let mut data = Vec::new();
109 let pos = reader.stream_position()?;
110 reader.read_to_end(&mut data)?;
111 reader.seek(SeekFrom::Start(pos))?;
112 base_offset = pe::get_base_offset(&data)?;
113 }
114 Ok(Box::new(YPF::new(
115 reader,
116 archive_encoding,
117 config,
118 base_offset,
119 )?))
120 }
121
122 fn extensions(&self) -> &'static [&'static str] {
123 &["ypf", "exe"]
124 }
125
126 fn script_type(&self) -> &'static ScriptType {
127 &ScriptType::YurisYPF
128 }
129
130 fn is_this_format(&self, filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
131 if buf_len >= 4 && buf.starts_with(b"YPF\0") {
132 return Some(20);
133 }
134 if buf_len >= 2 && buf.starts_with(b"MZ") {
135 let p = std::path::Path::new(filename);
136 if p.exists() {
137 if let Ok(file) = pelite::FileMap::open(p) {
138 if pe::get_base_offset(&file).is_ok() {
139 return Some(20);
140 }
141 }
142 }
143 }
144 None
145 }
146
147 fn is_archive(&self) -> bool {
148 true
149 }
150
151 fn create_archive(
152 &self,
153 filename: &str,
154 files: &[&str],
155 encoding: Encoding,
156 config: &ExtraConfig,
157 ) -> Result<Box<dyn Archive>> {
158 let f = std::fs::File::create(filename)?;
159 let writer = std::io::BufWriter::new(f);
160 Ok(Box::new(YPFArchiveWriter::new(
161 writer, files, encoding, config,
162 )?))
163 }
164}
165
166#[repr(u8)]
167#[derive(Debug, IntEnum, Clone, Copy)]
168enum ResourceType {
169 Default,
170 BMP,
171 PNG,
172 JPG,
173 GIF,
174 WAV,
175 OGG,
176 PSD,
177 YCG,
178 PSB,
179 WAV_,
180 OGG_,
181 OPUS,
182}
183
184impl Default for ResourceType {
185 fn default() -> Self {
186 Self::Default
187 }
188}
189
190fn get_file_type(name: &str, use_new_file_type: bool) -> ResourceType {
195 let ext = name.rsplit('.').next().unwrap_or("").to_ascii_lowercase();
196 match ext.as_str() {
197 "bmp" => ResourceType::BMP,
198 "png" => ResourceType::PNG,
199 "jpg" | "jpeg" => ResourceType::JPG,
200 "gif" => ResourceType::GIF,
201 "ycg" => ResourceType::YCG,
202 "psb" => ResourceType::PSB,
203 "wav" => {
204 if use_new_file_type {
205 ResourceType::WAV_
206 } else {
207 ResourceType::WAV
208 }
209 }
210 "ogg" => {
211 if use_new_file_type {
212 ResourceType::OGG_
213 } else {
214 ResourceType::OGG
215 }
216 }
217 "psd" => ResourceType::PSD,
218 "opus" => ResourceType::OPUS,
219 _ => ResourceType::Default,
220 }
221}
222
223#[derive(Clone, Debug)]
224struct YPFEntry {
225 name_hash: u32,
226 name: String,
227 typ: ResourceType,
228 compressed: bool,
229 size: u32,
230 compressed_size: u32,
231 offset: u64,
232 hash: Option<u32>,
233}
234
235fn get_info_as_version(info: &Option<Box<dyn Any>>) -> Result<u32> {
236 Ok(*info
237 .as_ref()
238 .ok_or_else(|| anyhow::anyhow!("info not found"))?
239 .downcast_ref()
240 .ok_or_else(|| anyhow::anyhow!("not YSTBHeader"))?)
241}
242
243impl StructPack for YPFEntry {
244 fn pack<W: Write>(
245 &self,
246 writer: &mut W,
247 big: bool,
248 encoding: Encoding,
249 info: &Option<Box<dyn std::any::Any>>,
250 ) -> Result<()> {
251 let version = get_info_as_version(info)?;
252 self.name_hash.pack(writer, big, encoding, info)?;
253 let table = if version < 500 {
254 &NAME_DEFAULT_TABLE
255 } else {
256 &NAME_V500_TABLE
257 };
258 let mut name = encode_string(encoding, &self.name, true)?;
259 if name.len() > 0xFF {
260 bail!("File name can not longer than 255 bytes.");
261 }
262 let name_len = name.len() as u8;
263 let name_len = (table
264 .iter()
265 .position(|s| *s == name_len)
266 .ok_or_else(|| anyhow!("No suitable len found in table"))?
267 as u8)
268 ^ 0xFF;
269 name_len.pack(writer, big, encoding, info)?;
270 for num in name.iter_mut() {
271 *num ^= match version {
272 290 => 64,
273 500 => 54,
274 _ => 0,
275 };
276 *num = !(*num);
277 }
278 writer.write_all(&name)?;
279 (self.typ as u8).pack(writer, big, encoding, info)?;
280 self.compressed.pack(writer, big, encoding, info)?;
281 self.size.pack(writer, big, encoding, info)?;
282 self.compressed_size.pack(writer, big, encoding, info)?;
283 if version >= 480 {
284 self.offset.pack(writer, big, encoding, info)?;
285 } else {
286 (self.offset as u32).pack(writer, big, encoding, info)?;
287 };
288 if version >= 473 {
289 let hash = self.hash.ok_or_else(|| anyhow!("hash not specified."))?;
290 hash.pack(writer, big, encoding, info)?;
291 }
292 Ok(())
293 }
294}
295
296#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
297pub enum NameHashType {
298 Crc32,
300 Murmur2,
302}
303
304impl Default for NameHashType {
305 fn default() -> Self {
306 Self::Murmur2
307 }
308}
309
310#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
311pub enum DataHashType {
312 Adler32,
314 Murmur2,
316 Xxh32,
318}
319
320impl Default for DataHashType {
321 fn default() -> Self {
322 Self::Murmur2
323 }
324}
325
326#[derive(Debug)]
327pub struct YPF<'a, T: Read + Seek + std::fmt::Debug + 'a> {
328 #[allow(unused)]
329 version: u32,
330 entries: Vec<YPFEntry>,
331 reader: Arc<Mutex<T>>,
332 _mark: std::marker::PhantomData<&'a ()>,
333}
334
335const NAME_DEFAULT_TABLE: [u8; 256] = [
336 0, 1, 2, 72, 4, 5, 53, 7, 8, 11, 10, 9, 16, 19, 14, 15, 12, 25, 18, 13, 20, 27, 22, 23, 24, 17,
337 26, 21, 30, 29, 28, 31, 35, 33, 34, 32, 36, 37, 41, 39, 40, 38, 42, 43, 47, 45, 50, 44, 48, 49,
338 46, 51, 52, 6, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 3, 73,
339 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
340 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
341 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135,
342 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154,
343 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
344 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,
345 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211,
346 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
347 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
348 250, 251, 252, 253, 254, 255,
349];
350
351const NAME_V500_TABLE: [u8; 256] = [
352 0, 1, 2, 10, 4, 5, 53, 7, 8, 11, 3, 9, 16, 19, 14, 15, 12, 24, 18, 13, 46, 27, 22, 23, 17, 25,
353 26, 21, 30, 29, 28, 31, 35, 33, 34, 32, 36, 37, 41, 39, 40, 38, 42, 43, 47, 45, 20, 44, 48, 49,
354 50, 51, 52, 6, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
355 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
356 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
357 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135,
358 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154,
359 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
360 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,
361 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211,
362 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
363 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
364 250, 251, 252, 253, 254, 255,
365];
366
367fn detect_hash(name: &[u8], expected: u32) -> Result<NameHashType> {
368 let mut hasher = StreamingMurmur2::new(0, name.len() as u32);
369 hasher.write(name);
370 if hasher.finish() as u32 == expected {
371 return Ok(NameHashType::Murmur2);
372 }
373 if crc32fast::hash(name) == expected {
374 return Ok(NameHashType::Crc32);
375 }
376 bail!("Unknown hash type or checksum/name is invalid/broken")
377}
378
379fn detect_data_hash<T: Read + Seek>(
380 mut stream: T,
381 size: u32,
382 expected: u32,
383) -> Result<DataHashType> {
384 let mut murmur2_hasher = StreamingMurmur2::new(0, size);
385 let mut adler32_hasher = adler::Adler32::new();
386 let mut xxh32_hasher = Xxh32::new(0);
387 let mut buf = [0; 1024];
388 loop {
389 let readed = stream.read(&mut buf)?;
390 if readed == 0 {
391 break;
392 }
393 let b = &buf[..readed];
394 murmur2_hasher.write(b);
395 adler32_hasher.write(b);
396 xxh32_hasher.write(b);
397 }
398 if murmur2_hasher.finish() as u32 == expected {
399 return Ok(DataHashType::Murmur2);
400 }
401 if adler32_hasher.finish() as u32 == expected {
402 return Ok(DataHashType::Adler32);
403 }
404 if xxh32_hasher.finish() as u32 == expected {
405 return Ok(DataHashType::Xxh32);
406 }
407 bail!("Unknown hash type or checksum/data is invalid/broken")
408}
409
410fn cal_name_hash(name: &[u8], typ: NameHashType) -> u32 {
411 match typ {
412 NameHashType::Crc32 => crc32fast::hash(name),
413 NameHashType::Murmur2 => {
414 let mut hasher = StreamingMurmur2::new(0, name.len() as u32);
415 hasher.write(name);
416 hasher.finish() as u32
417 }
418 }
419}
420
421impl<'b, T: Read + Seek + std::fmt::Debug + Send + Sync + 'b> YPF<'b, T> {
422 pub fn new(
423 mut reader: T,
424 archive_encoding: Encoding,
425 config: &ExtraConfig,
426 base_offset: u64,
427 ) -> Result<Self> {
428 if base_offset > 0 {
429 reader.seek(SeekFrom::Start(base_offset))?;
430 }
431 let mut header = [0u8; 4];
432 reader.read_exact(&mut header)?;
433 if &header != b"YPF\0" {
434 bail!("Invalid YPF archive header")
435 }
436 let version = reader.read_u32()?;
437 if !matches!(version, 234..=500) {
438 bail!("Unsupported YPF engine version: {}", version);
439 }
440 eprintln!("Yuris YPF engine version: {version}");
441 let count = reader.read_u32()?;
442 let index_size = reader.read_u32()?;
443 let mut entries = Vec::with_capacity(count as usize);
444 let table = if version < 500 {
445 &NAME_DEFAULT_TABLE
446 } else {
447 &NAME_V500_TABLE
448 };
449 let mut hash_type = None;
450 {
451 let mut index = StreamRegion::new(&mut reader, 0x20, index_size as u64)?;
452 for _ in 0..count {
453 let hash = index.read_u32()?;
454 let length = table[(index.read_u8()? ^ 0xff) as usize];
455 let mut name = index.read_exact_vec(length as usize)?;
456 for num in name.iter_mut() {
457 *num = !(*num);
458 *num ^= match version {
459 290 => 64,
460 500 => 54,
461 _ => 0,
462 };
463 }
464 if config.yuris_check_hash {
465 if let Some(hash_type) = hash_type {
466 let thash = cal_name_hash(&name, hash_type);
467 if hash != thash {
468 let name = decode_to_string(archive_encoding, &name, false)?;
469 bail!(
470 "checksum/name is invalid/broken for {name}. expected hash: {hash:08X}, actual: {thash:08X}"
471 );
472 }
473 } else {
474 let typ = detect_hash(&name, hash)?;
475 eprintln!("Detected name hash type: {:?}", typ);
476 hash_type = Some(typ);
477 }
478 }
479 let name = decode_to_string(archive_encoding, &name, true)?;
480 entries.push(YPFEntry {
481 name_hash: hash,
482 name: name.clone(),
483 typ: index
484 .read_u8()?
485 .try_into()
486 .map_err(|e| anyhow!("Unknown entry type for {name}: {}", e))?,
487 compressed: index.read_u8()? != 0,
488 size: index.read_u32()?,
489 compressed_size: index.read_u32()?,
490 offset: if version >= 480 {
491 index.read_u64()?
492 } else {
493 index.read_u32()? as u64
494 },
495 hash: if version >= 473 {
496 Some(index.read_u32()?)
497 } else {
498 None
499 },
500 })
501 }
502 }
503 if config.yuris_debug_archive {
504 println!("Entries in yuris YPF: {:#?}", entries);
505 let _ = std::io::stdout().flush();
506 }
507 if config.yuris_check_hash {
508 let mut data_hash_type = None;
509 for entry in &entries {
510 let hash = match entry.hash {
511 Some(hash) if hash != 0 => hash,
512 _ => continue,
513 };
514 let mut stream = StreamRegion::new(
515 &mut reader,
516 entry.offset,
517 entry.offset + entry.compressed_size as u64,
518 )?;
519 if let Some(hash_type) = data_hash_type {
520 let mut hasher: Box<dyn Hasher> = match hash_type {
521 DataHashType::Adler32 => Box::new(adler::Adler32::new()),
522 DataHashType::Murmur2 => {
523 Box::new(StreamingMurmur2::new(0, entry.compressed_size))
524 }
525 DataHashType::Xxh32 => Box::new(Xxh32::new(0)),
526 };
527 let mut buf = [0; 1024];
528 loop {
529 let readed = stream.read(&mut buf)?;
530 if readed == 0 {
531 break;
532 }
533 hasher.write(&buf[..readed]);
534 }
535 let thash = hasher.finish() as u32;
536 if thash != hash {
537 bail!(
538 "checksum/data is invalid/broken for {}. expected hash: {hash:08X}, actual: {thash:08X}",
539 entry.name
540 );
541 }
542 } else {
543 let typ = detect_data_hash(stream, entry.compressed_size, hash)?;
544 eprintln!("Detected data hash type: {:?}", typ);
545 data_hash_type = Some(typ);
546 }
547 }
548 }
549 Ok(Self {
550 version,
551 entries,
552 reader: Arc::new(Mutex::new(reader)),
553 _mark: std::marker::PhantomData,
554 })
555 }
556}
557
558impl<'b, T: Read + Seek + std::fmt::Debug + Send + Sync + 'b> Script for YPF<'b, T> {
559 fn default_output_script_type(&self) -> OutputScriptType {
560 OutputScriptType::Json
561 }
562
563 fn default_format_type(&self) -> FormatOptions {
564 FormatOptions::None
565 }
566
567 fn is_archive(&self) -> bool {
568 true
569 }
570
571 fn iter_archive_filename<'a>(
572 &'a self,
573 ) -> Result<Box<dyn Iterator<Item = Result<String>> + 'a>> {
574 Ok(Box::new(self.entries.iter().map(|s| Ok(s.name.clone()))))
575 }
576
577 fn iter_archive_offset<'a>(&'a self) -> Result<Box<dyn Iterator<Item = Result<u64>> + 'a>> {
578 Ok(Box::new(self.entries.iter().map(|s| Ok(s.offset))))
579 }
580
581 fn open_file<'a>(&'a self, index: usize) -> Result<Box<dyn ArchiveContent + Send + Sync + 'a>> {
582 let entry = self
583 .entries
584 .get(index)
585 .ok_or_else(|| anyhow!("index out of bound"))?;
586 let mut entry = Entry {
587 entry,
588 stream: StreamRegion::with_size(
589 MutexWrapper::new(self.reader.clone(), entry.offset),
590 entry.compressed_size as u64,
591 )?,
592 cache: Mutex::new(None),
593 pos: 0,
594 script_type: None,
595 };
596 let mut buf = [0; 0x20];
597 let readed = entry.read(&mut buf)?;
598 entry.rewind()?;
599 entry.script_type = detect_script_type(&entry.entry.name, readed, &buf);
600 Ok(Box::new(entry))
601 }
602}
603
604fn detect_script_type(_filename: &str, buf_len: usize, buf: &[u8]) -> Option<ScriptType> {
605 if buf_len >= 4 {
606 if buf.starts_with(b"YSCF") {
607 return Some(ScriptType::YurisYSCFG);
608 }
609 if buf.starts_with(b"YSCM") {
610 return Some(ScriptType::YurisYSCM);
611 }
612 if buf.starts_with(b"YSER") {
613 return Some(ScriptType::YurisYSER);
614 }
615 if buf.starts_with(b"YSLB") {
616 return Some(ScriptType::YurisYSLB);
617 }
618 if buf.starts_with(b"YSTB") {
619 return Some(ScriptType::YurisYSTB);
620 }
621 if buf.starts_with(b"YSTD") {
622 return Some(ScriptType::YurisYSTD);
623 }
624 if buf.starts_with(b"YSTL") {
625 return Some(ScriptType::YurisYSTL);
626 }
627 if buf.starts_with(b"YSVR") {
628 return Some(ScriptType::YurisYSVR);
629 }
630 }
631 #[cfg(feature = "yuris-img")]
632 if buf_len >= 12 && buf.starts_with(b"YDG\0YU-RIS\0\0") {
633 return Some(ScriptType::YurisYDG);
634 }
635 None
636}
637
638#[derive(Debug)]
639struct Entry<'a, T: Read + Seek + std::fmt::Debug + Send + Sync + 'a> {
640 entry: &'a YPFEntry,
641 stream: StreamRegion<MutexWrapper<T>>,
642 cache: Mutex<Option<Box<dyn ReadDebug + Send + Sync + 'a>>>,
643 pos: u64,
644 script_type: Option<ScriptType>,
645}
646
647impl<'b, T: Read + Seek + std::fmt::Debug + Send + Sync + 'b> ArchiveContent for Entry<'b, T> {
648 fn name(&self) -> &str {
649 &self.entry.name
650 }
651
652 fn size(&self) -> Option<u64> {
653 Some(self.entry.compressed_size as u64)
654 }
655
656 fn script_type(&self) -> Option<&ScriptType> {
657 self.script_type.as_ref()
658 }
659
660 fn to_data<'a>(&'a mut self) -> Result<Box<dyn ReadSeek + Send + Sync + 'a>> {
661 Ok(Box::new(self))
662 }
663}
664
665impl<'a, T: Read + Seek + std::fmt::Debug + Send + Sync + 'a> Read for Entry<'a, T> {
666 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
667 if self.entry.compressed {
668 let mut lock = self.cache.lock().map_err(|_| {
669 std::io::Error::new(std::io::ErrorKind::Other, "Failed to lock the mutex")
670 })?;
671 if let Some(cache) = lock.as_mut() {
672 let readed = cache.read(buf)?;
673 self.pos += readed as u64;
674 return Ok(readed);
675 }
676 self.stream.rewind()?;
677 let mut cache = Box::new(flate2::read::ZlibDecoder::new(self.stream.clone()))
678 as Box<dyn ReadDebug + Send + Sync + 'a>;
679 if self.pos > 0 {
680 cache.skip(self.pos)?;
681 }
682 let readed = cache.read(buf)?;
683 self.pos += readed as u64;
684 lock.replace(cache);
685 Ok(readed)
686 } else {
687 self.stream.read(buf)
688 }
689 }
690}
691
692impl<'a, T: Read + Seek + std::fmt::Debug + Send + Sync + 'a> Seek for Entry<'a, T> {
693 fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
694 if self.entry.compressed {
695 let new_pos = match pos {
696 SeekFrom::Start(p) => p,
697 SeekFrom::End(offset) => {
698 if offset < 0 {
699 if (-offset) as u64 > self.entry.size as u64 {
700 return Err(std::io::Error::new(
701 std::io::ErrorKind::InvalidInput,
702 "Seek from end exceeds file length",
703 ));
704 }
705 self.entry.size as u64 - (-offset) as u64
706 } else {
707 self.entry.size as u64 + offset as u64
708 }
709 }
710 SeekFrom::Current(offset) => {
711 if offset < 0 {
712 if (-offset) as u64 > self.pos {
713 return Err(std::io::Error::new(
714 std::io::ErrorKind::InvalidInput,
715 "Seek from current exceeds file start",
716 ));
717 }
718 self.pos - (-offset) as u64
719 } else {
720 self.pos + offset as u64
721 }
722 }
723 };
724 let mut lock = self.cache.lock().map_err(|_| {
725 std::io::Error::new(std::io::ErrorKind::Other, "Failed to lock the mutex")
726 })?;
727 if let Some(cache) = lock.as_mut()
728 && self.pos <= new_pos
729 {
730 let to_skip = new_pos - self.pos;
731 if to_skip > 0 {
732 cache.skip(to_skip)?;
733 }
734 self.pos = new_pos;
735 Ok(new_pos)
736 } else {
737 lock.take();
738 self.pos = new_pos;
739 Ok(new_pos)
740 }
741 } else {
742 self.stream.seek(pos)
743 }
744 }
745
746 fn stream_position(&mut self) -> std::io::Result<u64> {
747 if self.entry.compressed {
748 Ok(self.pos)
749 } else {
750 self.stream.stream_position()
751 }
752 }
753}
754
755pub struct Xxh32 {
756 inner: xxhash_rust::xxh32::Xxh32,
757}
758
759impl Xxh32 {
760 pub fn new(seed: u32) -> Self {
761 Self {
762 inner: xxhash_rust::xxh32::Xxh32::new(seed),
763 }
764 }
765}
766
767impl Hasher for Xxh32 {
768 fn write(&mut self, bytes: &[u8]) {
769 self.inner.update(bytes);
770 }
771 fn finish(&self) -> u64 {
772 self.inner.digest() as u64
773 }
774}
775
776pub struct YPFArchiveWriter<T: Write + Seek> {
777 writer: Arc<Mutex<T>>,
778 headers: Arc<Mutex<HashMap<String, YPFEntry>>>,
779 version: u32,
780 compress: bool,
781 zopfli: bool,
782 compress_level: u32,
783 zopfli_iteration_count: NonZeroU64,
784 zopfli_iterations_without_improvement: NonZeroU64,
785 zopfli_maximum_block_splits: u16,
786 runner: ThreadPool<Result<()>>,
787 data_hash: DataHashType,
788 encoding: Encoding,
789}
790
791impl<T: Write + Seek> YPFArchiveWriter<T> {
792 pub fn new(
799 mut writer: T,
800 files: &[&str],
801 encoding: Encoding,
802 config: &ExtraConfig,
803 ) -> Result<Self> {
804 writer.write_all(b"YPF\0")?;
805 let version = config.yuris_ypf_version.ok_or_else(|| {
806 anyhow!("Version is required. Use --yuris-ypf-version to specify version.")
807 })?;
808 writer.write_u32(version)?;
809 let file_count = files.len() as u32;
810 writer.write_u32(file_count)?;
811 writer.write_u32(0)?; writer.write_u128(0)?; let mut headers = HashMap::new();
814 let info = &Some(Box::new(version) as Box<dyn Any>);
815 for file in files {
816 let name = encode_string(encoding, file, true)?;
817 let mut hasher: Box<dyn Hasher> = match config.yuris_name_hash_type {
818 NameHashType::Crc32 => Box::new(crc32fast::Hasher::new()),
819 NameHashType::Murmur2 => Box::new(StreamingMurmur2::new(0, name.len() as u32)),
820 };
821 hasher.write(&name);
822 let header = YPFEntry {
823 name_hash: hasher.finish() as u32,
824 name: file.to_string(),
825 typ: get_file_type(file, config.yuris_use_new_file_type),
826 compressed: config.yuris_ypf_compress_file,
827 size: 0,
828 compressed_size: 0,
829 offset: 0,
830 hash: if version >= 473 { Some(0) } else { None },
831 };
832 header.pack(&mut writer, false, encoding, info)?;
833 headers.insert(file.to_string(), header);
834 }
835 let header_size = writer.stream_position()?;
836 writer.write_u32_at(12, header_size as u32)?;
837 Ok(Self {
838 writer: Arc::new(Mutex::new(writer)),
839 headers: Arc::new(Mutex::new(headers)),
840 version,
841 compress: config.yuris_ypf_compress_file,
842 zopfli: config.yuris_ypf_zopfli,
843 compress_level: config.zlib_compression_level,
844 zopfli_iteration_count: config.zopfli_iteration_count,
845 zopfli_iterations_without_improvement: config.zopfli_iterations_without_improvement,
846 zopfli_maximum_block_splits: config.zopfli_maximum_block_splits,
847 runner: ThreadPool::new(
848 if config.yuris_ypf_compress_file {
849 config.yuris_ypf_workers
850 } else {
851 1
852 },
853 Some("yuris-ypf-writer"),
854 false,
855 )?,
856 encoding,
857 data_hash: config.yuris_data_hash_type,
858 })
859 }
860
861 fn create_hasher(&self, length: u32) -> Box<dyn Hasher + Send + Sync> {
862 match self.data_hash {
863 DataHashType::Adler32 => Box::new(adler::Adler32::new()),
864 DataHashType::Murmur2 => Box::new(StreamingMurmur2::new(0, length)),
865 DataHashType::Xxh32 => Box::new(Xxh32::new(0)),
866 }
867 }
868
869 fn create_hasher2(&self) -> Box<dyn Hasher + Send + Sync> {
870 match self.data_hash {
871 DataHashType::Adler32 => Box::new(adler::Adler32::new()),
872 DataHashType::Murmur2 => Box::new(Murmur2::new(0)),
873 DataHashType::Xxh32 => Box::new(Xxh32::new(0)),
874 }
875 }
876}
877
878impl<T: Write + Seek + Send + Sync + 'static> Archive for YPFArchiveWriter<T> {
879 fn new_file<'a>(
880 &'a mut self,
881 name: &str,
882 size: Option<u64>,
883 ) -> Result<Box<dyn WriteSeek + 'a>> {
884 let inner = self.new_file_non_seek(name, size)?;
885 Ok(Box::new(Writer {
886 inner,
887 mem: MemWriter::new(),
888 }))
889 }
890
891 fn new_file_non_seek<'a>(
892 &'a mut self,
893 name: &str,
894 size: Option<u64>,
895 ) -> Result<Box<dyn Write + 'a>> {
896 let mut entry = self
897 .headers
898 .lock_blocking()
899 .get(name)
900 .ok_or_else(|| anyhow::anyhow!("File '{}' not found in archive", name))?
901 .clone();
902 if self.compress {
903 let (reader, writer) = std::io::pipe()?;
904 let file = self.writer.clone();
905 let headers = self.headers.clone();
906 let compress_level = self.compress_level;
907 let name = name.to_owned();
908 let zopfli = self.zopfli;
909 let iteration_count = self.zopfli_iteration_count;
910 let iterations_without_improvement = self.zopfli_iterations_without_improvement;
911 let maximum_block_splits = self.zopfli_maximum_block_splits;
912 let data_hash = self.data_hash;
913 self.runner.execute(
914 move |_| {
915 let mut tsize = 0;
916 let mut reader = TrackStream::new(reader, &mut tsize);
917 let mut data = Vec::new();
918 reader.read_to_end(&mut data)?;
919 if entry.compressed {
920 let mut compressed = MemWriter::new();
921 if zopfli {
922 let mut encoder = zopfli::ZlibEncoder::new(
923 zopfli::Options {
924 iteration_count,
925 iterations_without_improvement,
926 maximum_block_splits,
927 },
928 zopfli::BlockType::Dynamic,
929 &mut compressed,
930 )?;
931 encoder.write_all(&data)?;
933 encoder.finish()?;
934 } else {
935 let mut encoder = flate2::write::ZlibEncoder::new(
936 &mut compressed,
937 flate2::Compression::new(compress_level),
938 );
939 encoder.write_all(&data)?;
941 encoder.finish()?;
942 }
943 data = compressed.into_inner();
944 }
945 entry.size = tsize as u32;
946 entry.compressed_size = data.len() as u32;
947 if let Some(hash) = entry.hash.as_mut() {
948 let mut hasher: Box<dyn Hasher> = match data_hash {
949 DataHashType::Adler32 => Box::new(adler::Adler32::new()),
950 DataHashType::Murmur2 => {
951 Box::new(StreamingMurmur2::new(0, entry.compressed_size))
952 }
953 DataHashType::Xxh32 => Box::new(Xxh32::new(0)),
954 };
955 hasher.write(&data);
956 *hash = hasher.finish() as u32;
957 }
958 let mut writer = file.lock_blocking();
959 entry.offset = writer.seek(SeekFrom::End(0))?;
960 writer.write_all(&data)?;
961 headers.lock_blocking().insert(name, entry);
962 Ok(())
963 },
964 true,
965 )?;
966 Ok(Box::new(writer))
967 } else {
968 let mut writer = self.writer.lock_blocking();
969 entry.offset = writer.seek(SeekFrom::End(0))?;
970 Ok(Box::new(YPFArchiveFile {
971 entry,
972 writer: self.writer.clone(),
973 pos: 0,
974 headers: self.headers.clone(),
975 hasher: if let Some(size) = size {
976 self.create_hasher(size as u32)
977 } else {
978 self.create_hasher2()
979 },
980 }))
981 }
982 }
983
984 fn write_header(&mut self) -> Result<()> {
985 self.runner.join();
986 for err in self.runner.take_results() {
987 err?;
988 }
989 let mut writer = self.writer.lock_blocking();
990 let headers = self.headers.lock_blocking();
991 writer.seek(SeekFrom::Start(0x20))?;
992 let mut files = headers.iter().map(|(_, d)| d).collect::<Vec<_>>();
993 files.sort_by_key(|f| f.offset);
994 let info = &Some(Box::new(self.version) as Box<dyn Any>);
995 for file in files {
996 file.pack(writer.deref_mut(), false, self.encoding, info)?;
997 }
998 Ok(())
999 }
1000}
1001
1002struct YPFArchiveFile<T: Write + Seek> {
1003 entry: YPFEntry,
1004 writer: Arc<Mutex<T>>,
1005 pos: usize,
1006 headers: Arc<Mutex<HashMap<String, YPFEntry>>>,
1007 hasher: Box<dyn Hasher + Send + Sync>,
1008}
1009
1010impl<T: Write + Seek> Write for YPFArchiveFile<T> {
1011 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1012 let mut writer = self.writer.lock().map_err(|_| {
1013 std::io::Error::new(std::io::ErrorKind::Other, "Failed to lock the mutex")
1014 })?;
1015 writer.seek(SeekFrom::Start(self.entry.offset + self.pos as u64))?;
1016 let bytes_written = writer.write(buf)?;
1017 self.pos += bytes_written;
1018 self.entry.size = self.entry.size.max(self.pos as u32);
1019 self.hasher.write(&buf[..bytes_written]);
1020 Ok(bytes_written)
1021 }
1022
1023 fn flush(&mut self) -> std::io::Result<()> {
1024 self.writer
1025 .lock()
1026 .map_err(|_| {
1027 std::io::Error::new(std::io::ErrorKind::Other, "Failed to lock the mutex")
1028 })?
1029 .flush()
1030 }
1031}
1032
1033impl<T: Write + Seek> Drop for YPFArchiveFile<T> {
1034 fn drop(&mut self) {
1035 self.entry.compressed_size = self.entry.size;
1036 if let Some(hash) = self.entry.hash.as_mut() {
1037 *hash = self.hasher.finish() as u32;
1038 }
1039 self.headers
1040 .lock_blocking()
1041 .insert(self.entry.name.clone(), self.entry.clone());
1042 }
1043}
1044
1045struct Writer<'a> {
1046 inner: Box<dyn Write + 'a>,
1047 mem: MemWriter,
1048}
1049
1050impl std::fmt::Debug for Writer<'_> {
1051 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1052 f.debug_struct("Writer").field("mem", &self.mem).finish()
1053 }
1054}
1055
1056impl<'a> Write for Writer<'a> {
1057 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1058 self.mem.write(buf)
1059 }
1060
1061 fn flush(&mut self) -> std::io::Result<()> {
1062 self.mem.flush()
1063 }
1064}
1065
1066impl<'a> Seek for Writer<'a> {
1067 fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
1068 self.mem.seek(pos)
1069 }
1070
1071 fn stream_position(&mut self) -> std::io::Result<u64> {
1072 self.mem.stream_position()
1073 }
1074
1075 fn rewind(&mut self) -> std::io::Result<()> {
1076 self.mem.rewind()
1077 }
1078}
1079
1080impl<'a> Drop for Writer<'a> {
1081 fn drop(&mut self) {
1082 let _ = self.inner.write_all(&self.mem.data);
1083 let _ = self.inner.flush();
1084 }
1085}